import requests, json
import pandas as pd
import time
import re
from bs4 import BeautifulSoup
#keyword是搜索名称
url = "https://edith.xiaohongshu.com/api/sns/v10/search/notes?keyword=%E8%9B%8B%E4%BB%94%E6%B4%BE%E5%AF%B9bug&filters=%5B%5D&sort=time_descending&page=1&page_size=20&source=explore_feed&search_id=2az85h6q1ioazhkm3ebcw%402az85jjwcv96g8j65nvgg&session_id=2az85fkkk3zw8hgd7gtfk&api_extra=&page_pos=0&pin_note_id=&allow_rewrite=1&geo=&loaded_ad=&query_extra_info=&rec_extra_params=&preview_ad=&scene=&is_optimize=0&location_permission=0&device_level=4"

payload={}
headers = {
   'Host': 'edith.xiaohongshu.com',
   'xy-direction': '11',
   'x-b3-traceid': '0808bd81581f080f',
   'x-legacy-smid': '202212051843105efbc130454c104b527cad69be39129f01ad5ed99d207ff8',
   'x-legacy-did': '4640d58a-4def-3fcd-84ff-a9a455b00ec2',
   'x-legacy-fid': '1670236988102597a32b43b7093305fbcc2f2ee6d8b5',
   'x-legacy-sid': 'session.1670237031775590380086',
   'x-mini-mua': 'eyJhIjoiRUNGQUFGMDEiLCJjIjo1NywiZiI6NCwiayI6ImFkMjkwMjdkNGJjOTg0OThmOTVmNDQxNTIyNTExNDBjOWUxZWRiOWIwMTVhZmE1NTgwNmMxYWM3ZTFjNWIwN2YiLCJwIjoiYSIsInMiOiJmN2E1MzgzOWYzZjY1NmRkYzhmZDBjMWQ2ZmU3NjQ1NSIsInQiOnsiYyI6MTAsImQiOjAsInMiOjQwOTgsInQiOjEwNDA0MDAzODYsInR0IjpbMV19LCJ1IjoiMDAwMDAwMDBkOTM1Mzc4MWNkMmEyNzEwOWIyNGE4NmE3NjVmODZmNiIsInYiOiIxLjguNiJ9.dm3d_qtd-FZrQcctpf6Z_Zydl5lMtWSoBzmp47iTulDw-ZwquxE015WVEfzIMvHkzxd1b2NYJ9lKGjHazivlj207aeX0PebAPJ58qfrf2xLEzkihTZG5JyxuIAr467PaXOk8fma1ytUtVnuVHqlLFtnquhmTgbx26FjneN7r-sez7YTcE6OrT_WkY722jVIQumOKibnC5ejx9x8GRH8ChyKwP8RkW5dzjTxZV77cbT8RmN1mnfRU0KTEMPfK7a3CjYDaUN1TkwmGTwiyuuPbsBjN8ppP5erLm5wQJXysqO2nAahb3dpcbsn_Fm0bL6Ag7U2dD0rVScAlxwb0wxFCK7vJLWb7LubBPMJ4gGKH15Gz2KnhB7k3rZ5mUwdd1z_-3r_7gOBshv1pbhigvZBhuROHDn7nVR6UMnWNWFTV7wBKIdmLAixtx5f64wBPJ7dTXNZLPCy_4761Z85TypoSs4MpRiCB_J27RfHRTx3ZjnaDcpOaD0DVWWuJRseWmEbZiGJYVQjfo2WfdtQazrN7xQfXK9A1F1IZNr0ShaKFnBM9N_gOSZp6DSA6JIgS0PsN5TA07ovuqWUYY-QjMeENPV2kihDycwPrlEqMhB5xGbK51g122J-ond0LuJvh51tkE4YHIxqKgE8GNuzE11Xe7h0jOwNpyOxITdsRp_cNAQh0CBUKU_JQuti4cSUviFSnqx1D2G7w_xyfStjt21IiUFAn6YDY9ZQwSulPhAVDLQkoOGU9uIzVBEwLnykZMaWuJ3gldMZe19TOKJfGv3PBqE4JensQk4n_MDKcgMYuQ89HSc2r5Gn0SwTOjFC59_F0DECRu9iDkS_L3o6Gs1XQtS-ITUcLv4Y9vA4wo9-DOOHolHABUdOvxWWFghAL0uYJqYsTE1DIr_8dj-thyeV0BPjmICbnolZp6vykK6RsgBGLKP-wtiK1sigk_ZaWSyjB3HKS2AIfeT_r5luj16ZM_b5HYhkUYjNIB13aF5a_9fUX7gIQB1yAKKSgUbaP2b3UW8XZAFLLy61uEJSVGQVrpLjqXwEDEzHzY28cETtPWXhD4sSPI9FrTpyuwTIQ2L8_GBUshlpafgqEYxyXw-G2tL8i5qoDoAJqru2qzdoZVr8LtKMKuItlDXGIXt5CLeMrjEwlwCtmf_GDpeEa45I3FAFwgwUqrsnRHmzXzmLTqjA.',
   'x-mini-gid': '7d21b69c5c755514ba9201d33d09e3e2056ffea947359b6177b896a6',
   'x-mini-sig': '83d63c53a8ebb8de5ce02d0ddf24c6d933e4bada174f26c9d5c36dba2ec6e61a',
   'xy-common-params': 'deviceId=4640d58a-4def-3fcd-84ff-a9a455b00ec2&cpu_name=&identifier_flag=0&tz=Asia%2FShanghai&fid=1670236988102597a32b43b7093305fbcc2f2ee6d8b5&app_id=ECFAAF01&device_fingerprint1=202212051843105efbc130454c104b527cad69be39129f01ad5ed99d207ff8&uis=light&launch_id=1670295961&project_id=ECFAAF&device_fingerprint=202212051843105efbc130454c104b527cad69be39129f01ad5ed99d207ff8&versionName=7.65.5.1&platform=android&gid=7d21b69c5c755514ba9201d33d09e3e2056ffea947359b6177b896a6&sid=session.1670237031775590380086&t=1670296012&build=7655002&x_trace_page_current=search_result_notes&lang=zh-Hans&device_model=pad&teenager=0&channel=sogouBZ',
   'user-agent': 'Dalvik/2.1.0 (Linux; U; Android 7.1.2; 10X Build/NZH54D) Resolution/1080*2340 Version/7.65.5.1 Build/7655002 Device/(XIAOMI;10X) discover/7.65.5.1 NetType/WiFi',
   'referer': 'https://app.xhs.cn/',
   'shield': 'XYAAAAAQAAAAEAAABTAAAAUzUWEe0xG1IbD9/c+qCLOlKGmTtFa+lG43EIeeRWRK9Gw4CwyrNvHZ38q7UJz8N8j5l+2aZkRgwfGDSMYb6m3Cs11eaABu9n6uqCpTKXcPbtBxNT',
   'xy-platform-info': 'platform=android&build=7655002&deviceId=4640d58a-4def-3fcd-84ff-a9a455b00ec2',
   'Accept': '*/*',
   'Connection': 'keep-alive'
}


title_list, like_list, play_list = [],[],[]
# 以下根据网页来剔除不需要的部分
def resub(item):
    item = re.sub(r'^<span>.*\n?', '', item, flags=re.MULTILINE)  ## 去掉本站特有的<span>
    item = re.sub('<em class=\"keyword\">', '', item)  ## 去掉 搜索关键词标签
    item = re.sub('</em>', '', item)  ## 去掉 /标签
    item = re.sub('&quot;', '"', item)
    return item

def getdata(shu):
		# 用于输入当前页数
    data = {
        
    }
    # 读取url中数据
    res = requests.get(url, headers=headers, data=data).content.decode('utf-8')
    
    # 变为json格式数据
    jsonfile = json.loads(res)
    
    # 根据自己需求改变下列内容
    if (jsonfile['data']):
        for content in jsonfile['data']['items']:

            title_list.append(content['note']['title'])#标题
            链接 = 'https://www.xiaohongshu.com/discovery/item/'+ content['note']['id']
            like_list.append(链接)#链接
            
            发布时间 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(content['note']['timestamp']))#转换时间戳
            play_list.append(发布时间)#发布时间

    for i in range(shu):
        print('标题：'+ title_list[i])
        print('时间：'+ play_list[i])
        print('链接：'+ like_list[i])
        print('') 
def getdata1(name):
        url = "https://www.xiaohongshu.com/mobile/tags/sougood.top?name="+name

        payload={}
        headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36',
        'Accept': '*/*',
        'Host': 'www.xiaohongshu.com',
        'Connection': 'keep-alive'
        }

        response = requests.request("GET", url, headers=headers, data=payload)
        bes = BeautifulSoup(response.text,"lxml")

        jsonfile = str(bes.select('body > script:nth-child(3)'))
        jsonfile = re.sub('<script>window.__INITIAL_SSR_STATE__=', "" ,jsonfile)
        jsonfile = re.sub('</script>', "" ,jsonfile)
        jsonfile = json.loads(jsonfile)[0]['keywordLayout']['notes']
        
        title_list, like_list, play_list = [],[],[]

        # 根据自己需求改变下列内容
        if (jsonfile):
            for content in jsonfile:
                print(content['title'])

                title_list.append(content['title'])#标题
                链接 = 'https://www.xiaohongshu.com/explore/'+ content['id']
                like_list.append(链接)#链接
                
                #发布时间 = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(content['note']['timestamp']))#转换时间戳
                play_list.append(content['time'])#发布时间

# 从第一页开始获取数据
getdata(20)



#最新n个视频



